In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
In [2]:
stop=pd.read_csv('plant_vase1.CSV')
In [3]:
stop.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4117 entries, 0 to 4116
Data columns (total 12 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   year       4117 non-null   int64  
 1   month      4117 non-null   int64  
 2   day        4117 non-null   int64  
 3   hour       4117 non-null   int64  
 4   minute     4117 non-null   int64  
 5   second     4117 non-null   int64  
 6   moisture0  4117 non-null   float64
 7   moisture1  4117 non-null   float64
 8   moisture2  4117 non-null   float64
 9   moisture3  4117 non-null   float64
 10  moisture4  4117 non-null   float64
 11  irrgation  4117 non-null   bool   
dtypes: bool(1), float64(5), int64(6)
memory usage: 358.0 KB
In [4]:
stop.dtypes
Out[4]:
year           int64
month          int64
day            int64
hour           int64
minute         int64
second         int64
moisture0    float64
moisture1    float64
moisture2    float64
moisture3    float64
moisture4    float64
irrgation       bool
dtype: object
In [5]:
print(list(stop))
['year', 'month', 'day', 'hour', 'minute', 'second', 'moisture0', 'moisture1', 'moisture2', 'moisture3', 'moisture4', 'irrgation']
In [6]:
stop.drop(['irrgation','year','month'],axis=1,inplace=True)
In [7]:
duplicate_rows_stop = stop[stop.duplicated()]
In [8]:
stop.head()
Out[8]:
day hour minute second moisture0 moisture1 moisture2 moisture3 moisture4
0 6 22 16 11 0.70 0.64 0.73 0.40 0.02
1 6 22 17 11 0.70 0.64 0.71 0.39 0.02
2 6 22 18 11 0.69 0.63 0.70 0.39 0.02
3 6 22 19 11 0.69 0.63 0.70 0.39 0.02
4 6 22 20 12 0.69 0.62 0.69 0.39 0.02
In [9]:
stop.isnull().sum()
Out[9]:
day          0
hour         0
minute       0
second       0
moisture0    0
moisture1    0
moisture2    0
moisture3    0
moisture4    0
dtype: int64
In [10]:
sns.lmplot(x="day", y="moisture4", hue="moisture4",palette="rocket", data=stop)
plt.show()
sns.lmplot(x="hour", y="moisture4", hue="moisture4",palette="icefire", data=stop)
plt.show()
sns.lmplot(x="minute", y="moisture4", hue="moisture4",palette="coolwarm",data=stop)
plt.show()
sns.lmplot(x="hour", y="moisture4", hue="moisture4",palette="cubehelix",data=stop)
plt.show()

plt.show()
In [11]:
sns.lmplot(x="moisture0", y="moisture4", hue="moisture4",palette="YlOrBr", data=stop)
plt.show()
sns.lmplot(x="moisture1", y="moisture4", hue="moisture4",palette="Blues", data=stop)
plt.show()
sns.lmplot(x="moisture2", y="moisture4", hue="moisture4",palette="viridis", data=stop)
plt.show()
sns.lmplot(x="moisture3", y="moisture4", hue="moisture4",palette="rocket_r", data=stop)
plt.show()

plt.show()
In [12]:
stop[stop.columns[1:]].corr()['moisture3'][:]
Out[12]:
hour        -0.087577
minute      -0.001073
second       0.120826
moisture0    0.913997
moisture1    0.676025
moisture2    0.862147
moisture3    1.000000
moisture4    0.035159
Name: moisture3, dtype: float64
In [13]:
corr = stop.corr()
corr.style.background_gradient(cmap='coolwarm')
Out[13]:
  day hour minute second moisture0 moisture1 moisture2 moisture3 moisture4
day 1.000000 -0.216850 -0.016147 -0.072125 -0.924280 -0.611622 -0.878232 -0.891575 -0.025887
hour -0.216850 1.000000 -0.001180 0.070989 -0.081659 -0.034199 -0.033951 -0.087577 0.018892
minute -0.016147 -0.001180 1.000000 -0.002409 -0.002609 -0.013826 -0.005886 -0.001073 0.001120
second -0.072125 0.070989 -0.002409 1.000000 0.093240 0.093260 0.091372 0.120826 -0.003285
moisture0 -0.924280 -0.081659 -0.002609 0.093240 1.000000 0.735426 0.943547 0.913997 0.024469
moisture1 -0.611622 -0.034199 -0.013826 0.093260 0.735426 1.000000 0.783724 0.676025 -0.004386
moisture2 -0.878232 -0.033951 -0.005886 0.091372 0.943547 0.783724 1.000000 0.862147 0.002836
moisture3 -0.891575 -0.087577 -0.001073 0.120826 0.913997 0.676025 0.862147 1.000000 0.035159
moisture4 -0.025887 0.018892 0.001120 -0.003285 0.024469 -0.004386 0.002836 0.035159 1.000000
In [14]:
X = stop.drop('moisture4',axis=1)
In [15]:
X.head()
Out[15]:
day hour minute second moisture0 moisture1 moisture2 moisture3
0 6 22 16 11 0.70 0.64 0.73 0.40
1 6 22 17 11 0.70 0.64 0.71 0.39
2 6 22 18 11 0.69 0.63 0.70 0.39
3 6 22 19 11 0.69 0.63 0.70 0.39
4 6 22 20 12 0.69 0.62 0.69 0.39
In [16]:
y = stop['moisture4']
y.head()
Out[16]:
0    0.02
1    0.02
2    0.02
3    0.02
4    0.02
Name: moisture4, dtype: float64
In [17]:
from sklearn.preprocessing import StandardScaler
from sklearn import metrics

sc = StandardScaler()
X = sc.fit_transform(X)

X
Out[17]:
array([[-2.23301477,  1.59546095, -0.7775538 , ..., -0.54656403,
         1.04483935,  0.3766541 ],
       [-2.23301477,  1.59546095, -0.71979354, ..., -0.54656403,
         0.63193826,  0.32593835],
       [-2.23301477,  1.59546095, -0.66203328, ..., -0.99669488,
         0.42548771,  0.32593835],
       ...,
       [ 1.31927341,  1.15597797, -0.89307431, ..., -1.44682573,
        -1.84546831, -1.14481856],
       [ 1.31927341,  1.15597797, -0.83531405, ..., -1.44682573,
        -1.63901776, -1.14481856],
       [ 1.31927341,  1.15597797, -0.7775538 , ..., -0.99669488,
        -1.63901776, -1.19553432]])
In [18]:
from sklearn import metrics
X.shape
Out[18]:
(4117, 8)
In [19]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=42)
In [20]:
y_test
Out[20]:
2648    0.02
843     0.02
2222    0.02
2413    0.02
3911    0.02
        ... 
3254    0.02
2459    0.02
2166    0.02
867     0.02
926     0.02
Name: moisture4, Length: 1236, dtype: float64
In [21]:
def predict(algorithm):
    model = algorithm.fit(X_train,y_train)
    print('Training Score: {}'.format(model.score(X_train,y_train)))
    print('Test Accuracy: {}'.format(model.score(X_test, y_test)))

    preds = model.predict(X_test)
    print('Predictions are: {}'.format(preds))
    print('\n')

    r2_score = metrics.r2_score(y_test,preds)
    print('r2_score is:{}'.format(r2_score))
   
    print('MAE:',metrics.mean_absolute_error(y_test,preds))
    print('MSE:',metrics.mean_squared_error(y_test,preds))
    print('RMSE:',np.sqrt(metrics.mean_squared_error(y_test,preds)))
    sns.distplot(y_test-preds,color='red')
In [22]:
from sklearn.metrics import accuracy_score as score
In [23]:
from sklearn.linear_model import LinearRegression
In [24]:
predict(LinearRegression())
Training Score: 0.011281276597404633
Test Accuracy: 0.002788402154185632
Predictions are: [0.02048459 0.02100679 0.02061148 ... 0.02074936 0.02131462 0.02079831]


r2_score is:0.002788402154185632
MAE: 0.001477051083049631
MSE: 7.211835925786223e-06
RMSE: 0.002685486161905554
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
  warnings.warn(msg, FutureWarning)
In [25]:
ln_model = LinearRegression()
ln_model.fit(X_train, y_train)
preds1 = ln_model.predict(X_test)
preds1
Out[25]:
array([0.02048459, 0.02100679, 0.02061148, ..., 0.02074936, 0.02131462,
       0.02079831])
In [26]:
import plotly.express as px

fig = px.scatter(x=y_test, y=preds1, labels={'x': 'True Moisture', 'y': 'Predicted Moisture'})
fig.add_shape(
    type="line", line=dict(dash='dash'),
    x0=y.min(), y0=y.min(),
    x1=y.max(), y1=y.max()
)
fig.show()
In [27]:
from sklearn.ensemble import RandomForestRegressor
predict(RandomForestRegressor())
Training Score: 0.8634559358163412
Test Accuracy: 0.07568437859218236
Predictions are: [0.0203 0.0209 0.0201 ... 0.0227 0.0211 0.0241]


r2_score is:0.07568437859218236
MAE: 0.0011824433656958046
MSE: 6.684652103559864e-06
RMSE: 0.002585469416481244
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\distributions.py:2619: FutureWarning:

`distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).

In [28]:
rf = RandomForestRegressor()
rf.fit(X_train, y_train)
preds2 = rf.predict(X_test)
preds2
Out[28]:
array([0.0201, 0.0211, 0.0209, ..., 0.0217, 0.0223, 0.025 ])
In [29]:
import plotly.express as px

fig = px.scatter(x=y_test, y=preds2, labels={'x': 'True Moisture', 'y': 'Predicted Moisture'})
fig.add_shape(
    type="line", line=dict(dash='dash'),
    x0=y.min(), y0=y.min(),
    x1=y.max(), y1=y.max()
)
fig.show()
In [30]:
from sklearn.neighbors import KNeighborsRegressor

predict(KNeighborsRegressor())
Training Score: 0.18146928122792771
Test Accuracy: -0.19792981725695347
Predictions are: [0.02  0.02  0.022 ... 0.022 0.02  0.022]


r2_score is:-0.19792981725695347
MAE: 0.0013576051779935275
MSE: 8.663430420711972e-06
RMSE: 0.002943370588409141
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\distributions.py:2619: FutureWarning:

`distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).

In [31]:
knn = KNeighborsRegressor()
knn.fit(X_train, y_train)
preds3 = knn.predict(X_test)
preds3
Out[31]:
array([0.02 , 0.02 , 0.022, ..., 0.022, 0.02 , 0.022])
In [32]:
import plotly.express as px

fig = px.scatter(x=y_test, y=preds3, labels={'x': 'True Moisture', 'y': 'Predicted Moisture'})
fig.add_shape(
    type="line", line=dict(dash='dash'),
    x0=y.min(), y0=y.min(),
    x1=y.max(), y1=y.max()
)
fig.show()
In [33]:
from sklearn.tree import DecisionTreeRegressor 

predict(DecisionTreeRegressor())
Training Score: 1.0
Test Accuracy: -0.4990903577926007
Predictions are: [0.02 0.02 0.02 ... 0.02 0.02 0.02]


r2_score is:-0.4990903577926007
MAE: 0.0010841423948220119
MSE: 1.0841423948220056e-05
RMSE: 0.0032926317662654073
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\distributions.py:2619: FutureWarning:

`distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).

In [34]:
dt = KNeighborsRegressor()
dt.fit(X_train, y_train)
preds4 = dt.predict(X_test)
preds4
Out[34]:
array([0.02 , 0.02 , 0.022, ..., 0.022, 0.02 , 0.022])
In [35]:
import plotly.express as px

fig = px.scatter(x=y_test, y=preds4, labels={'x': 'True Moisture', 'y': 'Predicted Moisture'})
fig.add_shape(
    type="line", line=dict(dash='dash'),
    x0=y.min(), y0=y.min(),
    x1=y.max(), y1=y.max()
)
fig.show()
In [36]:
from xgboost.sklearn import XGBRegressor

predict( XGBRegressor())
Training Score: 0.5397815666750336
Test Accuracy: 0.14051909973409826
Predictions are: [0.0207539  0.02072221 0.02062525 ... 0.02051513 0.02170599 0.02459675]


r2_score is:0.14051909973409826
MAE: 0.0012292668977481062
MSE: 6.215767292974362e-06
RMSE: 0.0024931440578061996
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\distributions.py:2619: FutureWarning:

`distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).

In [37]:
xgb = XGBRegressor()
xgb.fit(X_train, y_train)
preds5 = xgb.predict(X_test)
preds5
Out[37]:
array([0.0207539 , 0.02072221, 0.02062525, ..., 0.02051513, 0.02170599,
       0.02459675], dtype=float32)
In [38]:
import plotly.express as px

fig = px.scatter(x=y_test, y=preds5, labels={'x': 'True Moisture', 'y': 'Predicted Moisture'})
fig.add_shape(
    type="line", line=dict(dash='dash'),
    x0=y.min(), y0=y.min(),
    x1=y.max(), y1=y.max()
)
fig.show()